---
title: "Identifying Price Informativeness"
author: "Eduardo Davila^[Yale] & Cecilia Parlatore^[NYU Stern]"
date: "`r Sys.Date()`"
output:
  html_document: default
  pdf_document:  default
---

```{r echo=FALSE, include=FALSE}

library(here); library(tidyverse); library(DescTools)
path <- here::here(); print(path); setwd(path); rm(path)

load("intermediate/dataset_merged.RData")
load("intermediate/parameters.RData")

```

# Restricting observations and constructing variables

Generate payoff variables that match the timing in the paper. The variables *mcap* and *payoff* are expressed in millions of dollars

We restrict the datasets to have at least N_a observations if annual and N_q if quarterly

```{r}
  
fn_payoff <- function(input_df, N, tenor=1, lags=4, normalize=TRUE, normalize_method=""){
  
  if(normalize){
    if(normalize_method == "numerator"){
      df <- input_df %>% mutate(payoff = log(1+(ebit_change/lag4_book)))
    }else{
      df <- input_df %>% mutate(payoff = ebit/lag1_book)
    }
    
  }else{
    df <- input_df %>% mutate(payoff = ebit)
  }
  
  df <- df %>% 
    group_by(permno) %>% 
    filter(!any(is.infinite(adjprc))) %>%
    filter(!all(payoff == 0)) %>%
    filter(!is.na(payoff))
  
  df <- df %>%
    group_by(permno) %>%
    mutate(payoff=Winsorize(payoff, val = quantile(payoff, probs = c(0.025, 0.975), na.rm = TRUE)),
           adjprc=Winsorize(adjprc, val = quantile(adjprc, probs = c(0.025, 0.975), na.rm = TRUE))
)
  
  if(normalize & normalize_method == "numerator"){
   df <- df %>% 
      group_by(permno) %>%
      mutate(log_price                        = log(adjprc),
             delta_log_price                  = log_price - dplyr::lag(log_price, n = lags, default = NA),
             delta_log_payoff_growth          = payoff,
             delta_log_payoff_growth_future   = dplyr::lead(payoff, n = tenor*lags, default = NA))
  
    df <- df %>% 
      select(permno, year, month, delta_log_price,
             delta_log_payoff_growth, delta_log_payoff_growth_future,
             everything())
    
  }else{
    df <- df %>% 
      group_by(permno) %>%
      mutate(log_price                        = log(adjprc),
             delta_log_price                  = log_price - dplyr::lag(log_price, n = lags, default = NA),
             payoff_lag                       = dplyr::lag(payoff,  n = lags, default = NA),
             delta_log_payoff_growth          = ifelse(payoff_lag > 0, (payoff/payoff_lag) - 1, ifelse(payoff_lag < 0, payoff/abs(payoff_lag) + 1, NA)),
             delta_log_payoff_growth_future   = dplyr::lead(delta_log_payoff_growth, n = tenor*lags, default = NA)) %>%
      select(-payoff_lag)
  
    df <- df %>% 
      select(permno, year, month, delta_log_price,
             delta_log_payoff_growth, delta_log_payoff_growth_future,
             everything())
  }
  
  output_df <- df %>% 
    group_by(permno) %>% 
    filter(!is.na(delta_log_price)) %>% # Remove NA observations
    filter(!is.na(delta_log_payoff_growth)) %>%
    filter(!is.na(delta_log_payoff_growth_future)) %>%
    filter(n() >= N) %>% 
    ungroup()
  
  return(output_df)
}

fn_payoff_unlearnable <- function(input_df, N, lags=4, normalize=TRUE, normalize_method=""){
  
  if(normalize){
    if(normalize_method == "numerator"){
      df <- input_df %>% mutate(payoff = log(1+(ebit_change/lag4_book)))
    }else{
      df <- input_df %>% mutate(payoff = ebit/lag1_book)
    }
  }else{
    df <- input_df %>% mutate(payoff = ebit)
  }
  
  df <- df %>% mutate(forecast_growth = log(1+(forecast_diff/lag4_book)),
                      future_forecast_growth = log(1+(future_forecast_diff/lag4_book)))
  
  df <- df %>% 
    group_by(permno) %>% 
    filter(!any(is.infinite(adjprc))) %>%
    filter(!all(payoff == 0)) %>%
    filter(!is.na(payoff))
  
  df <- df %>%
    group_by(permno) %>%
    mutate(payoff=Winsorize(payoff, val = quantile(payoff, probs = c(0.025, 0.975), na.rm = TRUE)),
           adjprc=Winsorize(adjprc, val = quantile(adjprc, probs = c(0.025, 0.975), na.rm = TRUE)))
  
  if(normalize & normalize_method == "numerator"){
   df <- df %>% 
      group_by(permno) %>%
      mutate(log_price                        = log(adjprc),
             delta_log_price                  = log_price - dplyr::lag(log_price, n = lags, default = NA),
             delta_log_payoff_growth          = payoff)
  
    df <- df %>% 
      select(permno, year, month, delta_log_price,
             delta_log_payoff_growth,
             everything())
    
  }else{
    df <- df %>% 
      group_by(permno) %>%
      mutate(log_price                        = log(adjprc),
             delta_log_price                  = log_price - dplyr::lag(log_price, n = lags, default = NA),
             payoff_lag                       = dplyr::lag(payoff,  n = lags, default = NA),
             delta_log_payoff_growth          = ifelse(payoff_lag > 0, (payoff/payoff_lag) - 1, ifelse(payoff_lag < 0, payoff/abs(payoff_lag) + 1, NA))) %>%
      select(-payoff_lag)
  
    df <- df %>% 
      select(permno, year, month, delta_log_price,
             delta_log_payoff_growth,
             everything())
  }
    
  output_df <- df %>% 
    group_by(permno) %>% 
    filter(!is.na(delta_log_price)) %>% # Remove NA observations
    filter(!is.na(forecast_growth)) %>%
    filter(!is.na(future_forecast_growth)) %>%
    filter(!is.na(delta_log_payoff_growth)) %>%
    filter(n() >= N) %>% 
    ungroup()
  
  return(output_df)
}

df_q <- merged_q %>% fn_payoff(N_q, lags=q_lag, normalize=normalize_payoff, normalize_method=normalize_method)

df_q_unlearnable <- merged_q %>% fn_payoff_unlearnable(N_q, lags=q_lag, normalize=normalize_payoff, normalize_method=normalize_method)

df_q_tenors <- c()
for(tenor in tenors) {
  df <- merged_q %>% fn_payoff(N_q, lags=q_lag, tenor=tenor, normalize=normalize_payoff, normalize_method=normalize_method)
  df$tenor <- tenor
  df_q_tenors <- rbind(df_q_tenors, df)
}

save(df_q, df_q_unlearnable, df_q_tenors, file = "intermediate/data_selected.RData")

```
